Import necessary packages

packages <- c(
    "dplyr",
    "ggplot2",
    "plotly",
    "data.table",
    "readr"
)

lapply(packages, require, character.only = T)
[[1]]
[1] TRUE

[[2]]
[1] TRUE

[[3]]
[1] TRUE

[[4]]
[1] TRUE

[[5]]
[1] TRUE

Import the economist data

df <- read_csv("datasets/Economist_Assignment_Data.csv")
New names:Rows: 173 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Country, Region
dbl (4): ...1, HDI.Rank, HDI, CPI
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# drop first column
df <- df[, -1]

Explore the data

head(df)
glimpse(df)
Rows: 173
Columns: 5
$ Country  <chr> "Afghanistan", "Albania", "Algeria", "Angola", "Argentina", "Armenia", "Australia",…
$ HDI.Rank <dbl> 172, 70, 96, 148, 45, 86, 2, 19, 91, 53, 42, 146, 47, 65, 18, 167, 141, 108, 74, 11…
$ HDI      <dbl> 0.398, 0.739, 0.698, 0.486, 0.797, 0.716, 0.929, 0.885, 0.700, 0.771, 0.806, 0.500,…
$ CPI      <dbl> 1.5, 3.1, 2.9, 2.0, 3.0, 2.6, 8.8, 7.8, 2.4, 7.3, 5.1, 2.7, 7.8, 2.4, 7.5, 3.0, 5.7…
$ Region   <chr> "Asia Pacific", "East EU Cemt Asia", "MENA", "SSA", "Americas", "East EU Cemt Asia"…
summary(df)
   Country             HDI.Rank           HDI              CPI           Region         
 Length:173         Min.   :  1.00   Min.   :0.2860   Min.   :1.500   Length:173        
 Class :character   1st Qu.: 47.00   1st Qu.:0.5090   1st Qu.:2.500   Class :character  
 Mode  :character   Median : 96.00   Median :0.6980   Median :3.200   Mode  :character  
                    Mean   : 95.28   Mean   :0.6581   Mean   :4.052                     
                    3rd Qu.:143.00   3rd Qu.:0.7930   3rd Qu.:5.100                     
                    Max.   :187.00   Max.   :0.9430   Max.   :9.500                     

Transform the data

df <- df %>% mutate(Country = factor(Country), Region = factor(Region))

Check data after tranform

head(df)
glimpse(df)
Rows: 173
Columns: 5
$ Country  <fct> Afghanistan, Albania, Algeria, Angola, Argentina, Armenia, Australia, Austria, Azer…
$ HDI.Rank <dbl> 172, 70, 96, 148, 45, 86, 2, 19, 91, 53, 42, 146, 47, 65, 18, 167, 141, 108, 74, 11…
$ HDI      <dbl> 0.398, 0.739, 0.698, 0.486, 0.797, 0.716, 0.929, 0.885, 0.700, 0.771, 0.806, 0.500,…
$ CPI      <dbl> 1.5, 3.1, 2.9, 2.0, 3.0, 2.6, 8.8, 7.8, 2.4, 7.3, 5.1, 2.7, 7.8, 2.4, 7.5, 3.0, 5.7…
$ Region   <fct> Asia Pacific, East EU Cemt Asia, MENA, SSA, Americas, East EU Cemt Asia, Asia Pacif…
summary(df)
        Country       HDI.Rank           HDI              CPI                      Region  
 Afghanistan:  1   Min.   :  1.00   Min.   :0.2860   Min.   :1.500   Americas         :31  
 Albania    :  1   1st Qu.: 47.00   1st Qu.:0.5090   1st Qu.:2.500   Asia Pacific     :30  
 Algeria    :  1   Median : 96.00   Median :0.6980   Median :3.200   East EU Cemt Asia:18  
 Angola     :  1   Mean   : 95.28   Mean   :0.6581   Mean   :4.052   EU W. Europe     :30  
 Argentina  :  1   3rd Qu.:143.00   3rd Qu.:0.7930   3rd Qu.:5.100   MENA             :18  
 Armenia    :  1   Max.   :187.00   Max.   :0.9430   Max.   :9.500   SSA              :46  
 (Other)    :167                                                                           

Plot graph from the data

pl <- df %>% 
    ggplot(aes(x = CPI, y = HDI, text = paste("Country:", Country))) +
    geom_point(aes(color = Region), size = 3) +
    geom_smooth(aes(group = 1), 
                color = "red", 
                se = F, 
                method = "lm", 
                formula = y ~ log(x)) +
    scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10 = least corrupt)", 
                       limits = c(1, 10), 
                       breaks = 1:10) +
    scale_y_continuous(name = "Human Development Index, 2011 (1 = best)",
                       limits = c(0.2, 1),
                       breaks = seq(0.2, 1, 0.1)) +
    ggtitle("Corruption and human development") + 
    theme_bw()

Create an interactive graph

ggplotly(pl)
LS0tDQp0aXRsZTogIkRhdGEgdmlzdWFsaXphdGlvbiBwcm9qZWN0Ig0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KSW1wb3J0IG5lY2Vzc2FyeSBwYWNrYWdlcw0KDQpgYGB7cn0NCnBhY2thZ2VzIDwtIGMoDQogICAgImRwbHlyIiwNCiAgICAiZ2dwbG90MiIsDQogICAgInBsb3RseSIsDQogICAgImRhdGEudGFibGUiLA0KICAgICJyZWFkciINCikNCg0KbGFwcGx5KHBhY2thZ2VzLCByZXF1aXJlLCBjaGFyYWN0ZXIub25seSA9IFQpDQpgYGANCg0KSW1wb3J0IHRoZSBlY29ub21pc3QgZGF0YQ0KDQpgYGB7cn0NCmRmIDwtIHJlYWRfY3N2KCJkYXRhc2V0cy9FY29ub21pc3RfQXNzaWdubWVudF9EYXRhLmNzdiIpDQoNCiMgZHJvcCBmaXJzdCBjb2x1bW4NCmRmIDwtIGRmWywgLTFdDQpgYGANCg0KRXhwbG9yZSB0aGUgZGF0YQ0KDQpgYGB7cn0NCmhlYWQoZGYpDQpnbGltcHNlKGRmKQ0Kc3VtbWFyeShkZikNCmBgYA0KDQpUcmFuc2Zvcm0gdGhlIGRhdGENCg0KYGBge3J9DQpkZiA8LSBkZiAlPiUgbXV0YXRlKENvdW50cnkgPSBmYWN0b3IoQ291bnRyeSksIFJlZ2lvbiA9IGZhY3RvcihSZWdpb24pKQ0KYGBgDQoNCkNoZWNrIGRhdGEgYWZ0ZXIgdHJhbmZvcm0NCg0KYGBge3J9DQpoZWFkKGRmKQ0KZ2xpbXBzZShkZikNCnN1bW1hcnkoZGYpDQpgYGANCg0KUGxvdCBncmFwaCBmcm9tIHRoZSBkYXRhDQoNCmBgYHtyfQ0KcGwgPC0gZGYgJT4lIA0KICAgIGdncGxvdChhZXMoeCA9IENQSSwgeSA9IEhESSwgdGV4dCA9IHBhc3RlKCJDb3VudHJ5OiIsIENvdW50cnkpKSkgKw0KICAgIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gUmVnaW9uKSwgc2l6ZSA9IDMpICsNCiAgICBnZW9tX3Ntb290aChhZXMoZ3JvdXAgPSAxKSwgDQogICAgICAgICAgICAgICAgY29sb3IgPSAicmVkIiwgDQogICAgICAgICAgICAgICAgc2UgPSBGLCANCiAgICAgICAgICAgICAgICBtZXRob2QgPSAibG0iLCANCiAgICAgICAgICAgICAgICBmb3JtdWxhID0geSB+IGxvZyh4KSkgKw0KICAgIHNjYWxlX3hfY29udGludW91cyhuYW1lID0gIkNvcnJ1cHRpb24gUGVyY2VwdGlvbnMgSW5kZXgsIDIwMTEgKDEwID0gbGVhc3QgY29ycnVwdCkiLCANCiAgICAgICAgICAgICAgICAgICAgICAgbGltaXRzID0gYygxLCAxMCksIA0KICAgICAgICAgICAgICAgICAgICAgICBicmVha3MgPSAxOjEwKSArDQogICAgc2NhbGVfeV9jb250aW51b3VzKG5hbWUgPSAiSHVtYW4gRGV2ZWxvcG1lbnQgSW5kZXgsIDIwMTEgKDEgPSBiZXN0KSIsDQogICAgICAgICAgICAgICAgICAgICAgIGxpbWl0cyA9IGMoMC4yLCAxKSwNCiAgICAgICAgICAgICAgICAgICAgICAgYnJlYWtzID0gc2VxKDAuMiwgMSwgMC4xKSkgKw0KICAgIGdndGl0bGUoIkNvcnJ1cHRpb24gYW5kIGh1bWFuIGRldmVsb3BtZW50IikgKyANCiAgICB0aGVtZV9idygpDQpgYGANCg0KQ3JlYXRlIGFuIGludGVyYWN0aXZlIGdyYXBoDQoNCmBgYHtyfQ0KZ2dwbG90bHkocGwpDQpgYGANCg==